#------------------------------------------------------------------------------
# This script converts files from sas data to fst files
# we obtained the raw ACS PUMS micro sample from the following website
# https://www2.census.gov/programs-surveys/acs/data/pums/
#==============================================================================
    
# create function for reading all xls sheets
read_excel_allsheets <- function(filename, tibble = FALSE) {
  sheets <- excel_sheets(filename)
  x <-
    lapply(sheets, function(X)
      read_excel(filename, sheet = X))
  if (!tibble)
    x <- lapply(x, as.data.frame)
  names(x) <- sheets
  return(x)
}

#d) create function for converting all files from sas7bdat, xls, .dat, .sav  to .fst
file_to_fst <- function(in.path, out.path, filetype) {
  #i) create directory
      dir.create(out.path, showWarnings = F)
  
  #ii) identify old and new file names
      if (filetype == "sas7bdat") {
        raw.file.names <- list.files(in.path, pattern = "sas7bdat$")
        new.file.names <- gsub("sas7bdat", "fst", raw.file.names)
      }
      if (filetype == "xls") {
        raw.file.names <- list.files(in.path, pattern = "xls$")
        new.file.names <- gsub("xls", "fst", raw.file.names)
      }
      if (filetype == "dat") {
        raw.file.names <- list.files(in.path, pattern = "dat$")
        new.file.names <- gsub("dat", "fst", raw.file.names)
      }
      if (filetype == "sav") {
        raw.file.names <- list.files(in.path, pattern = "sav$")
        new.file.names <- gsub("sav", "fst", raw.file.names)
      }
  
  #iii) start loop
      for (i in 1:length(raw.file.names)) {
        #load data
            if (filetype == "sas7bdat") {
              df.temp <- data.table(read_sas(file.path(in.path, raw.file.names[i])))
            }
            if (filetype == "xls") {
              df.temp <- (read_excel_allsheets(file.path(in.path, raw.file.names[i])))
            }
            if (filetype == "sav") {
              df.temp <- data.table(read_spss(file.path(in.path, raw.file.names[i])))
            }
        #save data as fst
            if (filetype == "xls") {
              for (j in 1:length(df.temp)) {
                file.name2 <-
                  gsub(".fst",
                       paste("_", j, "_", names(df.temp[j]), ".fst", sep = ""),
                       new.file.names[i])
                write_fst(df.temp[[j]], file.path(out.path, file.name2))
              }
            } else{
              write_fst(df.temp, file.path(out.path, new.file.names[i]))
            }
            print(paste(100 * round(i / length(raw.file.names), 2), "%", sep = ""))
      }
}

# microACS
file_to_fst(
  filetype = "sas7bdat",
  in.path = file.path(rawdata_path, "ACS_PUMS"),
  out.path = file.path(rawdata_path, "ACS_PUMS")
)

# NVDRS
file_to_fst(
  filetype = "sas7bdat",
  in.path = file.path(rawdata_path, "NVDRS"),
  out.path = file.path(rawdata_path, "NVDRS")
)

